% Read raw data file
data = dlmread('jtpa.raw');

% Subset to male observations
male = data(:,5) == 1;
data = data(male,:);

% Define main variables we'll use
y = data(:,2); % earnings
d = dummy(data(:,3)); % randomly assigned offer of job training indicator
maineffect{1} = dummy(data(:,6)); % hs grad
maineffect{2} = dummy(data(:,7)+2*data(:,8));  % 7 = black, 8 = hispanic
maineffect{3} = dummy(data(:,12:16)*((1:5)'));  % age2225, age2629, age3035, age3644, age4554
maineffect{4} = dummy(data(:,9));  % married
maineffect{5} = dummy(data(:,10));  % work less than 13 weeks
maineffect{6} = dummy(data(:,19));  % f2sms??

% Create all the interactions
Z = [maineffect{1},maineffect{2},maineffect{4},maineffect{5},maineffect{6},maineffect{3}];

[X,V,VUnique,nVU] = interaction_expansion(Z);

% Drop columns with very observations in either treatment or control state
nx = sum(X(d == 1,:) ~= 1);
X = X(:,nx > 5);

ndx = sum(X(d == 0,:) ~= 1);
X = X(:,ndx > 5);

% Drop columns with small variance
stdx0 = std(X(d==0,:));
stdx1 = std(X(d==1,:));
X = X(:,stdx0 > .1 & stdx1 > .1);

% Drop any columns that would lead to singularity in regressions within
% treatment or control subsets
%{
[~,Rx,ex] = qr(X(d == 1,:),0);
keepvar = ex(abs(diag(Rx)) > 1e-6);
X = X(:,keepvar);

[~,Rx,ex] = qr(X(d == 0,:),0);
keepvar = ex(abs(diag(Rx)) > 1e-6);
X = X(:,keepvar);
%}
% Keep the variables we care about
clearvars -except y d X XOLD V VUnique nVU ;
